export const dataPipelines = {
  "toSel": "请选择",
  "toInput": "请输入",
  "noData": "暂无数据",
  "saveConfiguration": "保存配置",
  "dataAcquisition": "数据采集",
  "dataCollectionTask": "数据采集任务",
  "dataSourceManagement": "数据源管理",
  "formatConversion": "格式转换",
  "dataFormatConversion": "数据格式转换",
  "taskDescription": "任务描述",
  "sourceFormat": "源格式",
  "targetFormat": "目标格式",
  "dataFlowBranch": "数据流向分支",
  "startExecution": "开始执行",
  "searchTaskName": "搜索任务名称",
  "confirmTermination": "确认终止",
  "terminate": "终止",
  "waiting": "等待中",
  "error": "错误",
  "taskStatus": "任务状态",
  "labelStudio": "数据标注",
  "dataSourceInfo": {
    Mysql: {
      title: '关系型数据库(MySQL)',
      desc: '批量导入数据库表，支持自定义表、字段'
    },
    Mongodb: {
      title: '非关系型数据库(MongoDB)',
      desc: '导入非关系型数据，支持集合、字段选择和结构转换'
    },
    File: {
      title: '文件数据导入',
      desc: '支持CSV、Excel、JSON等多种格式文件导入'
    },
    Hive: {
      title: 'Hive系统导入',
      desc: '高效读取hive系统中存储的数据'
    },
  },
  "testingConnection": "正在测试连接",
  "pleaseSelectAnExecutionTime": "请选择执行时间",
  "deletingTask": "正在删除任务",
  "terminatingTask": "正在终止任务",
  "addDataSource": "添加数据源",
  "fileFormat": "文件格式",
  "connectionStatus": "连接状态",
  "dataSourceType": "数据源类型",
  "searchDataSources": "搜索数据源",
  "searchNameOrDescription": "搜索名称",
  "dataProcessing": "数据处理",
  "dataProcessingConfiguration": "数据处理配置",
  "taskFlowConfiguration": "任务流配置",
  "dataExportConfiguration": "数据导出配置",
  "taskExecuted": "任务已执行",
  "taskExecutionFailed": "任务执行失败",
  "cannotCancel": "任务处理中，无法取消",
  "taskSuccessStop": "任务已成功停止",
  "taskStopFailed": "任务停止失败",
  "processingResult": "处理结果",
  "algorithmTemplate": "算法模板",
  "builtInTemplate": "内置模板",
  "customTemplate": "自定义模板",
  "operatorManagement": "算子管理",
  "systemDashboard": "系统仪表盘",
  "concurrentTaskMonitoring": "并发任务监控",
  "myAlgorithmTemplate": "我的算法模板",
  "createAlgorithmTemplate": "创建算法模板",
  "dataProcessingDescription": "数据处理可支持用户使用不同的模型算子，针对大模型所用的数据进行处理，包括数据清洗、自动数据增强及分析等处理方式，用户可通过数据处理来获取更高质量的数据",
  "nodeName": "节点名称",
  "nodeConfig": "节点配置",
  "nodeType": "节点类型",
  "nodeNotSelected": "未选择节点",
  "fieldRequired": "{field}不能为空",
  "noNodesError": "工作流中未存在任何节点", // 工作流中未存在任何节点
  "unnamedNode": "未命名节点({id})", // 未命名节点({id})
  "unconnectedNodeError": "{nodeName}节点：尚未连接任何其他节点", // {nodeName}节点：尚未连接任何其他节点
  "invalidConfigError": "{nodeName}节点的配置未填写完整，请检查必填项", // {nodeName}节点的配置未填写完整，请检查必填项
  "configRequiredError": "{nodeName}节点的【{configName}】是必填项，请填写完整",
  "saveError": "保存工作流时出错", // 保存工作流时出错
  "noMatchingNodeFound": "未找到匹配节点",
  "searchProcessing": "搜索处理任务",
  "zoomIn": "放大",
  "zoomOut": "缩小",
  "resetView": "重置视图",
  "clearCanvas": "清除画布",
  "operationGuide": "操作指南",
  "operationGuide1": "从左侧拖拽节点到右侧画布区域",
  "operationGuide2": "点击节点上的连接点并拖动到另一个节点来创建连接",
  "operationGuide3": "拖拽节点可以调整位置",
  "operationGuide4": "鼠标悬停节点显示删除按钮",
  "operationGuide5": "点击删除按钮或按Delete键删除选中节点",
  "configInfo": "配置信息",
  "search": "搜索",
  "loading": "加载中",
  "taskCategories": "任务分类",
  "allCategories": "全部分类",
  "createTask": "创建任务",
  "taskList": "任务列表",
  "taskName": "任务名称",
  "DatabaseName": "数据库名称",
  "ServerAddress": "服务器地址",
  "port": "端口号",
  "username": "用户名",
  "password": "密码",
  "authType": "鉴权类型",
  "collectionSourceName": "采集源名称",
  "server": "服务器",
  "database": "数据库",
  "task": "任务",
  "dataSourceDetails": "数据源详情",
  "close": "关闭",
  "operationSuccessful": "操作成功",
  "operationFailed": "操作失败",
  "basicInformation": "基本信息",
  "dataSourceName": "数据源名称",
  "lastUpdate": "最后更新",
  "normal": "正常",
  "toBeTested": "待测试",
  "anomaly": "异常",
  "useRecord": "使用记录",
  "dataImportTask": "数据导入任务",
  "persons": "个",
  "recentlyUsed": "最近使用",
  "dataVolume": "数据量",
  "total": "总",
  "startAt": "开始于",
  "done": "完成",
  "taskRunningHost": "任务运行主机",
  "recordsHaveBeenImported": "已导入记录",
  "totalNumberOfRecords": "总记录数",
  "cancelTask": "取消任务",
  "refreshStatus": "刷新状态",
  "viewLog": "查看日志",
  "resourceOccupation": "资源占用",
  "RunItAgain": "重新运行",
  "dataConnectionConfiguration": "数据连接配置",
  "authType_option_NONE": "不做身份校验",
  "authType_option_LDAP": "使用基于 LDAP/AD 的用户身份校验",
  "authType_option_KERBEROS": "使用 Kerberos/GSSAPI 做身份校验",
  "authType_placeholder": "目前只支持 LDAP 模式",
  "collectionSourceDesc": "采集源描述",
  "testLink": "测试连接",
  "dataFilteringConfiguration": "数据筛选配置",
  "selectionSet": "选择集合",
  "searchForTheTableName": "搜索表名称",
  "allFields": "全部字段",
  "selectAll": "全选",
  "saveTheConfiguration": "保存配置",
  "saveAndExecute": "保存并执行",
  "executeImmediately": "立即执行",
  "selectTheExecutionTime": "选择执行时间",
  "sure": "确定",
  "PleaseSelectTime": "请选择时间",
  "fileUpload": "文件上传",
  "jumpLink": "跳转链接",
  "linkSuccess": "连接成功",
  "linkError": "连接失败",
  "connectionInformation": "连接信息",
  "TaskFailed": "任务失败",
  "manualStop": "手动停止",
  "public": "公开",
  "private": "私有",

  "createTime": "创建时间",
  "dataAmount": "数据量",
  "finishTime": "完成时间",
  "processedDataAmount": "已处理数据量",
  "processInfo": "处理详情",
  "processStatus": "运行状态",
  "processedData": "已处理数据",
  "graphicDemonstration": "图形化演示",
  "sessionProcessedResult": "Session处理结果",
  "index": "序号",
  "preSession": "处理前Session",
  "processType": "处理方式",
  "afterSession": "处理后Session",
  "taskLog": "任务日志",
  "logName": "日志名称",
  "downloadLog": "下载日志",
  "others": "其他",
  "replace": "替换",
  "deduplicate": "去重",
  "remove": "删除",
  "data_refine": "数据处理",
  "Internal": "内部",
  "data_generation": "数据生成",
  "data_enhancement": "数据增强",

  "data_source": "数据源",
  "execution_completed_normally": "执行结束（正常）",
  "execution_end_error": "执行结束（错误）",
  "stopped": "已停止",
  "celery_node_service_list": "Celery 节点服务列表",
  "ip_address": "IP 地址",
  "current_number_tasks": "当前任务数",
  "node_status": "节点状态",
  "heartbeat_time": "心跳时间",

  "taskType": "任务类型",
  "dataCleaning": "数据清洗",
  "processingStatus": "处理状态",
  "processingText": "处理字段",
  "inProgress": "处理中",
  "completed": "已完成",
  "dataSource": "数据来源",
  "dataSourceBranch": "数据来源分支",
  "dataFlow": "数据流向",
  "startTime": "开始时间",
  "endTime": "结束时间",
  "executionStatus": {
    "success": "执行完成",
    "error": "执行失败",
    "processing": "正在执行",
    "wainting": "等待执行",
  },
  "unknown": "未知",
  "online": "在线",
  "offline": "离线",
  "operations": "操作",
  "delete": "删除",
  "deleteConfirm": "确认删除",
  "cancelConfirm": "确认取消",
  "rerunItConfirm": "确认重新运行",
  "execute": "执行",
  "cancelExecute": "取消执行",
  "executeConfirm": "确认执行",
  "confirm": "确认",
  "reset": "替换",
  "details": "详情",
  "authorize": "授权",
  "settings": "设置",
  "operatorAuthorization": "算子授权",
  "SearchUserName": "搜索用户名称",
  "SearchOrganizationName": "搜索组织名称",
  "person": "个人",
  "organization": "组织",
  "selected": "已选",
  "editIcon": "编辑图标",
  "iconPreview": "图标预览",
  "please": "请",
  "uploadTips1": "支持JPG、PNG格式图标，最大不超过10MB",
  "uploadTips2": "已上传新图标，可继续上传替换或点击确定保存",
  "uploadIcon": "上传图标",
  "uploadStatusTips1": "点击或拖拽图标到此处",
  "uploadStatusTips2": "上传后将替换当前图标",
  "uploading": "正在上传",
  "uploadSuccess": "上传成功",
  "uploadSuccessTips1": "可继续上传替换或点击确定保存",
  "reUpload": "重新上传",
  "uploadFailed": "上传失败",
  "retry": "重试",
  "uploadSuccessTips2": "图标上传成功",
  "uploadFailedTips1": "请上传JPG或PNG格式的图标",
  "uploadFailedTips2": "图标大小不能超过10MB",
  "uploadFailedTips3": "上传失败，请重试",
  "networkError": "网络错误，请检查连接后重试",
  "submitting": "保存中",
  "algorithmTemplateDescription": "算法模版可支持用户使用多种不同的模型算子组成工作流，完成数据清洗、自动数据增强及分析等工作。",
  "taskTemplate": "任务模板",
  "templateName": "模板名称",
  "templateDescription": "模板描述",
  "searchTaskTemplate": "搜索任务模板",
  "searchTemplate": "搜索模板",
  "searchOperator": "搜索算子",
  "nextStep": "下一步",
  "previousStep": "上一步",
  "create": "创建",
  "edit": "修改",
  "type": "类型",
  "copy": "复制",
  "use": "使用",
  "templateList": "模板列表",
  "createTemplate": "创建模板",
  "editTemplate": "修改模板",
  "general": "通用",
  "dataCleaningDescription": "通过去重、去敏等多种算子，清洗数据，使数据满足使用需求",
  "dataAugmentation": "数据增强",
  "dataAugmentationDescription": "基于种子数据自动化生成更多数据，可用于训练数据生成，支持自定义参数及Prompt",
  "textClassification": "文本分类",
  "textClassificationDescription": "增强文本分类任务的训练数据，适用于情感分类、标签分类、商品分类等场景",
  "textExtraction": "文本抽取",
  "textExtractionDescription": "增强文本抽取类任务的训练数据，适用于特定格式抽取、实体抽取、要素提取等场景",
  "textGeneration": "文本创作",
  "textGenerationDescription": "增强文本创作类任务的训练数据，适用于新闻写作、广告稿生成、写作内容风格化等场景",
  "apply": "使用",
  "newTask": "新建任务",
  "pushToOriginalDataset": "推送到原数据集",
  "pushToOriginalDatasetDescription": "推送到原数据集后，将以新提交的方式推送到原始数据集repo中",
  "pushToNewDataset": "推送到新数据集",
  "pushToSelectedDatasetDescription": "数据清洗完成后，将推送到所选数据集",
  "targetDataset": "目标数据集名称",
  "predefinedOperatorSelection": "预置算子选择",
  "predefinedOperator": "预置算子",
  "peratorTip": "目前支持多种 Mapper、Filter、Deduplicator 类型的预置算子",
  "publishAsNewTemplate": "发布为新模版",
  "executionOrder": "执行顺序",
  "enableOrNot": "是否开启",
  "addOperator": "添加算子",
  "operatorType": "算子类型",
  "operatorName": "算子名称",
  "textNormalization": "文本标准化",
  "removeSpecialContent": "特殊内容移除",
  "maskSensitiveInformation": "敏感信息打码",
  "specialCharacterRatioFiltering": "特殊字符占比过滤",
  "sensitiveWordFiltering": "敏感词过滤",
  "nGramRepetitionRatioFiltering": "N-Gram重复比率过滤",
  "lengthFiltering": "长度过滤",
  "md5Deduplication": "MD5去重",
  "articleSimilarityDeduplication": "文章相似度去重",
  "toxicityRemoval": "毒性去除",
  "operatorConfiguration": "算子配置",
  "unicodeTextNormalization": "Unicode文本标准化",
  "convertTraditionalChineseToSimplifiedChinese": "繁体转简体",
  "removeURLLinks": "去除URL链接",
  "removeInvisibleCharacters": "去除不可见字符",
  "removeHtmlTagsAndParseHtmlContent": "去除html格式字符并解析出html文本",
  "maximumRatio": "比例最大值",
  "lengthN": "长度N",
  "minimumLength": "长度最小值",
  "characters": "字符",
  "windowLength": "窗口长度",
  "description": "描述",
  "textNormalizationDesc": "文本Unicode标准化和繁体转中文",
  "removeSpecialContentDesc": "移除文本中的特殊内容，例如文章中的url、不可见字符、html格式字符等",
  "maskSensitiveInformationDesc": "将敏感信息打码，例如将邮箱地址字符替换成[EMAIL]，手机电话号码替换成[TELEPHONE]或[MOBILEPHONE]，身份证号码替换成[IDNUM]",
  "specialCharacterRatioFilteringDesc": "根据特殊字符占比过滤文本，保留特殊字符个数占文本总长度比例不超过设定阈值的样本，特殊字符包括标点符号，数字，空格符号，emoji表情包等，超过设定比例的数据样本将被过滤",
  "sensitiveWordFilteringDesc": "过滤掉带有敏感词的样本",
  "nGramRepetitionRatioFilteringDesc": "保留字符级N-Gram重复比率不超过设定阈值的样本，超过阈值的样本将被过滤",
  "lengthFilteringDesc": "根据文本长度过滤数据，长度范围之外的数据将被过滤",
  "md5DeduplicationDesc": "根据文本生成的MD5值对比去重，MD5校验一致的样本将被过滤",
  "articleSimilarityDeduplicationDesc": "使用SimHash算法计算文本间的相似度，相似度超过阈值样本将被过滤",
  "toxicityRemovalDesc": "自动检测分析并去除数据中敏感、不合规的内容，本算子仅对数据内容进行分析、处理，不保存、保留任何处理前、处理后的数据内容",
  "previewBefore": "效果预览（清洗前）",
  "previewAfter": "效果预览（清洗后）",
  "creationCompleted": "创建完成",
  "updateTemplate": "更新模版",
  "cancel": "取消",
  "templateNameExists": "模板名称已存在，请使用其他名称",
  "Queued": "待处理",
  "Processing": "处理中",
  "Finished": "已完成",
  "Failed": "失败",
  "Timeout": "超时",
  "Canceled": "已取消",
  "sessionDel": "Session已删除",

  "toolsTit": "工具池",
  "toolsDec": "Dataflow 工具池是一个一站式多模态数据处理系统，可使数据质量更高、更有价值、更适合大模型处理。",
  "toolsSearch": "搜索工具",
  "toolsType": "工具分类",
  "toolsName": "工具名称",
  "toolsUse": "使用工具",
  "taskType1": "算子",
  "taskType2": "工具",
  "log": "日志",
  "toolsTab1": "内部工具",
  "toolsTab2": "外部工具",

  "analysis_common_internal": "通用分析工具",
  "dataset_spliter_by_language_preprocess_internal": "数据集按语言分割预处理工具",
  "prepare_dataset_from_repo_preprocess_internal": "从代码仓库准备数据集预处理工具",
  "raw_alpaca_cot_merge_add_meta_preprocess_internal": "原始Alpaca-Cot数据合并与元数据添加预处理工具",
  "raw_arxiv_to_jsonl_preprocess_internal": "原始arXiv数据转换为JSONL预处理工具",
  "raw_stackexchange_to_jsonl_preprocess_internal": "原始Stack Exchange数据转换为JSONL预处理工具",
  "reformat_csv_nan_value_preprocess_internal": "CSV文件NaN值重格式化预处理工具",
  "reformat_jsonl_nan_value_preprocess_internal": "JSONL文件NaN值重格式化预处理工具",
  "serialize_meta_preprocess_internal": "元数据序列化预处理工具",
  "count_token_postprocess_internal": "令牌计数后处理工具",
  "data_mixture_postprocess_internal": "数据混合后处理工具",
  "deserialize_meta_postprocess_internal": "元数据反序列化后处理工具",
  "quality_classifier_common_internal": "质量分类器通用",
  "opencsg_data_extraction_preprocess_internal": "开放计算系统数据提取预处理",
  "opencsg_scrape_url_data_preprocess_internal": "开放计算系统抓取 URL 数据预处理",
  "fineweb_edu_chinese_common_internal": "文本价值评估",
  "smoltalk_chinese_common_internal": "高质量对话生成",
  "cosmopedia_chinese_preprocess_internal": "增强文本描述工具",

  "analysis_common_internal_dec": "此分析器类用于分析特定数据集。它会为配置文件中的所有过滤操作计算统计数据，对这些统计数据应用多种分析（如整体分析、逐列分析等），并生成分析结果（统计表、分布图等），帮助用户更好地理解输入数据集。",
  "dataset_spliter_by_language_preprocess_internal_dec": "从源目录加载数据集，然后使用名为 LanguageIDScoreFilter 的操作过滤器进行语言识别，最后按语言分割数据集并保存。",
  "prepare_dataset_from_repo_preprocess_internal_dec": "从代码仓库中准备数据集，格式包括：仓库名称、仓库中的文件路径、文件内容。",
  "raw_alpaca_cot_merge_add_meta_preprocess_internal_dec": "将从Hugging Face下载的原始Alpaca-Cot数据转换为JSONL文件，合并指令/输入/输出文本，并添加元数据信息。",
  "raw_arxiv_to_jsonl_preprocess_internal_dec": "将原始arXiv数据（gzipped tar文件）转换为JSONL格式。",
  "raw_stackexchange_to_jsonl_preprocess_internal_dec": "将从Archive（参考：https://archive.org/download/stackexchange）下载的原始Stack Exchange数据转换为多个JSONL文件。",
  "reformat_csv_nan_value_preprocess_internal_dec": "使用Hugging Face加载可能包含NaN值的CSV或TSV文件，并可通过设置额外参数（如设置 	keep_default_na 为False）进行处理。",
  "reformat_jsonl_nan_value_preprocess_internal_dec": "重格式化可能包含NaN值的JSONL文件。遍历JSONL文件，找到第一个不包含NaN的对象作为参考特征类型，并将其设置为加载所有JSONL文件时的基准。",
  "serialize_meta_preprocess_internal_dec": "序列化JSONL文件中除用户指定字段以外的所有字段，确保即使JSONL文件中每行文本格式不一致，数据集仍可正常加载。",
  "count_token_postprocess_internal_dec": "统计给定数据集和分词器的令牌数量。目前仅支持JSONL格式。",
  "data_mixture_postprocess_internal_dec": "将多个数据集混合成一个数据集。随机选择每个数据集的样本并混合这些样本，然后导出为新的混合数据集。支持的格式包括：[“jsonl”, “json”, “parquet”]。",
  "deserialize_meta_postprocess_internal_dec": "对JSONL文件中的指定字段进行反序列化处理。",
  "quality_classifier_common_internal_dec": "本质量分类器类用于预测数据集中文档的评分。它将计算所有行的分数，并为每一行提供两列：分数（score）和是否保留（should_keep），以帮助用户决定应该删除哪一行。默认情况下，如果分数高于 0.9，则将该行标记为 should_keep=1。",
  "opencsg_data_extraction_preprocess_internal_dec": "一个高质量的工具，用于将 PDF 转换为 Markdown 和 JSON",
  "opencsg_scrape_url_data_preprocess_internal_dec": "基于大型语言模型的网站和本地文档（XML、HTML、JSON 等）的数据抓取工具",
  "fineweb_edu_chinese_common_internal_dec": "用户可以定义自己的评分标准，根据这些标准对数据源中的数据进行评分，并过滤数据。最高分是5分。",
  "smoltalk_chinese_common_internal_dec": "使用固定的system_prompt生成具有大型模型的相关多轮对话并对其进行评分。根据用户指定的分数过滤数据，只保留分数最高的数据。",
  "cosmopedia_chinese_preprocess_internal_dec": "使用MakeCosmopediaMapper操作符将原始文本转换为WikiHow样式的详细教程。此工具调用大型语言模型，根据输入的种子文本生成结构化教程内容。",
}
